package com.cshuig.hadoop.mapreduce.phoneflow;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

import java.io.IOException;

/**
 * Created by hogan on 2015/10/28.
 */
public class CountMain {
    public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
        Job job = Job.getInstance(new Configuration());
        job.setJarByClass(CountMain.class);

        job.setMapperClass(CountMapper.class);
        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(DataCount.class);
        FileInputFormat.setInputPaths(job, new Path("/HTTP_20130313143750.dat"));

        job.setReducerClass(CountReducer.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(DataCount.class);
        FileOutputFormat.setOutputPath(job, new Path("/phone_count_result"));

        /**
         * 特别注意:
         *  1、如果reducer个数 小于 分区个数, 则运行会出错
         *  2、如果reduce个数  打印 分区个数, 则运行部会报错，但是多出来的reduce也会产生一个空分区文件
         */
        //设置reducer数量
        job.setNumReduceTasks(4);
        //设置分区,将结果分发到不同的reducer中处理
        job.setPartitionerClass(PhonePatitioner.class);

        job.waitForCompletion(true);
    }
}
